This notebook examines the relationship between different features of the data for distinguishing viral from nonviral sequences.

Please reach out to James Riddell () or Bridget Hegarty () regarding any issues, or open an issue on github.

library(ggplot2)
library(plyr)
library(reshape2)
library(viridis)
Loading required package: viridisLite
library(tidyr)

Attaching package: ‘tidyr’

The following object is masked from ‘package:reshape2’:

    smiths
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:plyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise, summarize

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(readr)
library(data.table)
data.table 1.14.0 using 1 threads (see ?getDTthreads).  Latest news: r-datatable.com
**********
This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.
This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux.
**********

Attaching package: ‘data.table’

The following objects are masked from ‘package:dplyr’:

    between, first, last

The following objects are masked from ‘package:reshape2’:

    dcast, melt
viruses <- read_tsv("../IntermediaryFiles/viral_tools_combined.tsv")

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  seqtype = col_character(),
  contig = col_character(),
  checkv_provirus = col_character(),
  checkv_quality = col_character(),
  method.x = col_character(),
  Classified = col_character(),
  IDs_all = col_character(),
  Seq = col_character(),
  Kaiju_Viral = col_character(),
  Kingdom = col_character(),
  type = col_character(),
  vibrant_quality = col_character(),
  method.y = col_character(),
  vibrant_prophage = col_character(),
  vs2type = col_character(),
  max_score_group = col_character()
)
ℹ Use `spec()` for the full column specifications.
colnames(viruses)
 [1] "Index"                "seqtype"              "contig"               "checkv_provirus"      "checkv_completeness"  "checkv_contamination"
 [7] "checkv_viral_genes"   "checkv_host_genes"    "checkv_total_genes"   "checkv_length"        "checkv_quality"       "method.x"            
[13] "Classified"           "NCBI_taxon"           "len"                  "ID_best"              "IDs_all"              "Seq"                 
[19] "Kaiju_Viral"          "Kingdom"              "score"                "pvalue"               "bh_pvalue"            "type"                
[25] "vibrant_quality"      "method.y"             "vibrant_prophage"     "category"             "vs2type"              "dsDNAphage"          
[31] "ssDNA"                "NCLDV"                "RNA"                  "lavidaviridae"        "max_score"            "max_score_group"     
[37] "hallmark"             "viral"                "cellular"             "percent_host"         "percent_viral"        "percent_unknown"     
There were 25 warnings (use warnings() to see them)
ggplot(viruses, aes(x=NCLDV, y=viral)) +
There were 50 or more warnings (use warnings() to see the first 50)
  geom_hex(bins = 30) +
  scale_fill_continuous(type = "viridis", trans="log10") +
  theme_bw() +
  facet_wrap(~confusion_matrix_high_MCC, scales = "free") +
  xlab("NCLDV VS2 Score") +
  ylab("VS2 Viral Score")

Important features by sequence type

pal <- ggthemes::tableau_color_pal(palette="Tableau 10", type="regular")

ggplot(viruses, aes(x=checkv_host_genes, y=checkv_viral_genes)) +
There were 34 warnings (use warnings() to see them)
  geom_hex(bins = 30) +
  scale_fill_continuous(type = "viridis", trans="log10") +
  theme_bw() +
  facet_wrap(~seqtype, scales = "free") +
  xlab("Number of Host Genes") +
  ylab("Number of Viral Genes")

ggplot(viruses, aes(x=hallmark, y=checkv_length)) +
There were 32 warnings (use warnings() to see them)
  geom_hex(bins = 30) +
  scale_fill_continuous(type = "viridis", trans="log10") +
  theme_bw() +
  facet_wrap(~seqtype, scales = "free") +
  xlab("Number of Hallmark Genes") +
  ylab("Length of Sequence") 

table(viruses$checkv_host_genes>=50, viruses$seqtype)
       
        archaea bacteria fungi plasmid protist virus
  FALSE    7542    47439  1146    3938    4993 10000
  TRUE     2399    17278   497    1052       7     0
ggplot(viruses, aes(x=checkv_length, y=checkv_completeness)) +
  geom_hex(bins = 30) +
  scale_fill_continuous(type = "viridis", trans="log10") +
  theme_bw() +
  facet_wrap(~seqtype, scales = "free") +
  xlab("Length") +
  ylab("Completeness") 

ggplot(viruses, aes(x=hallmark, y=checkv_completeness)) +
There were 30 warnings (use warnings() to see them)
  geom_hex(bins = 30) +
  scale_fill_continuous(type = "viridis", trans="log10") +
  theme_bw() +
  facet_wrap(~seqtype, scales = "free") +
  xlab("Hallmark Genes") +
  ylab("Completeness") 

table(viruses$seqtype[viruses$checkv_length>50000 & viruses$hallmark==0])/table(viruses$seqtype)

  archaea  bacteria     fungi   plasmid   protist     virus 
0.3314556 0.2962900 0.5605600 0.4841683 0.0168000 0.0184000 
table(viruses$seqtype[((viruses$checkv_viral_genes*3) <= viruses$checkv_host_genes) & viruses$checkv_provirus=="No"])/table(viruses$seqtype)

  archaea  bacteria     fungi   plasmid   protist     virus 
0.9842068 0.8948808 0.9263542 0.8298597 0.7198000 0.0210000 
table(viruses$seqtype[viruses$checkv_viral_genes==0 & viruses$checkv_host_genes>=1])/table(viruses$seqtype)

  archaea  bacteria     fungi   plasmid   protist     virus 
0.8565537 0.6711065 0.2422398 0.4166333 0.0306000 0.0057000 
table(viruses$seqtype[viruses$percent_viral>=50])/table(viruses$seqtype)

    archaea    bacteria       fungi     plasmid     protist       virus 
0.000201187 0.027797951 0.006695070 0.004208417 0.053200000 0.306600000 
table(viruses$seqtype[viruses$percent_unknown>=75])/table(viruses$seqtype)

   archaea   bacteria      fungi    plasmid    protist      virus 
0.11628609 0.08322388 0.82349361 0.45511022 0.85360000 0.39280000 
table(viruses$seqtype[viruses$percent_unknown>=75 & viruses$checkv_length<50000])/table(viruses$seqtype)

   archaea   bacteria      fungi    plasmid    protist      virus 
0.10552258 0.08021076 0.34814364 0.25430862 0.83540000 0.25300000 
table(viruses$seqtype[viruses$hallmark>2])/table(viruses$seqtype[viruses$seqtype %in% unique(viruses$seqtype[viruses$hallmark>2])])

    archaea    bacteria     plasmid       virus 
0.008952822 0.045428558 0.055711423 0.576000000 
table(viruses$seqtype, viruses$Kaiju_Viral)
seqdata <- data.frame(seqtype=viruses$seqtype[!duplicated(viruses$contig)])

rownames(seqdata) <- viruses$contig[!duplicated(viruses$contig)]
library(phyloseq)
features_table <- viruses[viruses$Index==1,]
features_table <- features_table[,colnames(features_table) %in% c( 
                                              "checkv_viral_genes",
                                              "checkv_host_genes",
                                              "checkv_unknown_genes",
                                              "checkv_length",
                                              "checkv_completeness",
                                              "checkv_total_genes",
                                              "percent_host",
                                              "percent_viral",
                                              "hallmark",
                                              "percent_unknown"
                                              )]

features_table[is.na(features_table)] <- 0
ft_colnames <- colnames(features_table)
features_table <- t(features_table)
rownames(features_table) <- ft_colnames
colnames(features_table) <- rownames(seqdata)

physeq_pooled <- phyloseq(otu_table(features_table, taxa_are_rows = T))
ordination <- phyloseq::ordinate(physeq =physeq_pooled, method = "PCoA", distance = "bray")
phyloseq::plot_ordination(physeq = physeq_pooled, ordination = ordination,
                          shape="numtools", color="num_viruses") + 
  geom_point(size = 3) +
  theme_bw() +
  geom_label(label=seqdata$toolcombo)

phyloseq::plot_ordination(physeq = physeq_pooled, ordination = ordination,
                          shape="numtools", color="num_viruses") + 
  geom_point(size = 3) +
  theme_bw()

Viral Addition Tuning Rules

viruses_sankey_tv <- data.frame(seqtype=viruses$seqtype,
There were 12 warnings (use warnings() to see them)
                             kj_cel=rep(0,nrow(viruses)),
                             hall=rep(0,nrow(viruses)),
                             pv=rep(0,nrow(viruses)),
                             cvl_pu=rep(0, nrow(viruses)))
viruses_sankey_tv$kj_cel[viruses$Kaiju_Viral=="Viruses"] <- 0.5
viruses_sankey_tv$hall[viruses$hallmark>2] <- 0.5
viruses_sankey_tv$pv[viruses$percent_viral>=50] <- 0.5
viruses_sankey_tv$cvl_pu[viruses$checkv_length>50000 & viruses$percent_unknown<=75] <- 0.5    

viruses_sankey_tv$all <- rowSums(viruses_sankey_tv[,2:5])
viruses_sankey_tv %>%
  count(seqtype, all) %>% spread(key = all, value=n)
viruses_sankey_tv <- viruses_sankey_tv %>%
  count(seqtype, kj_cel, hall, pv, cvl_pu, all) %>%
  mutate(viral_score=factor(all))
ggplot(viruses_sankey_tv,
       aes(axis1 = kj_cel, axis2 = hall, axis3 = pv, axis4 = cvl_pu, y=n)) +
  geom_alluvium(aes(fill=viral_score),
                width = 0, knot.pos = 0, reverse = FALSE) +
  geom_stratum(width = 1/5) +
  theme_bw() +
  geom_text(stat = "stratum", aes(label = after_stat(stratum)),
            reverse = FALSE) +
  theme(
        axis.text.x=element_text(size=14, angle = 90)
        ) +
  scale_x_continuous(breaks=c(1,2,3,4),
    labels=c("kaiju", "hallmark", "% viral", "% unknown")) +
  facet_wrap(~seqtype, scales="free_y") 

Viral Removal Tuning Rules

viruses_sankey_tnv <- data.frame(seqtype=viruses$seqtype,
                             kj_cel=rep(0,nrow(viruses)),
                             hg_pro=rep(0,nrow(viruses)),
                             vg_hg=rep(0,nrow(viruses)),
                             vg_hg_pro=rep(0, nrow(viruses)),
                             cvl_hm=rep(0, nrow(viruses)),
                             cvl_cp=rep(0,nrow(viruses)))
viruses_sankey_tnv$kj_cel[viruses$Kaiju_Viral=="cellular organisms"] <- -0.5
viruses_sankey_tnv$hg_pro[viruses$checkv_host_genes>50 & viruses$provirus==FALSE] <- -1
viruses_sankey_tnv$vg_hg[viruses$checkv_viral_genes==0 & viruses$checkv_host_genes>=1] <- -1
viruses_sankey_tnv$vg_hg_pro[((viruses$checkv_viral_genes*3) <= viruses$checkv_host_genes) & viruses$provirus==FALSE] <- -1
viruses_sankey_tnv$cvl_hm[viruses$checkv_length>500000 & viruses$hallmark<=1] <- -1
viruses_sankey_tnv$cvl_cp[viruses$checkv_length>5000 & viruses$checkv_completeness<=75] <- -0.5             
viruses_sankey_tnv <- viruses_sankey_tnv %>%
  count(seqtype, kj_cel, hg_pro, vg_hg, vg_hg_pro, cvl_hm, cvl_cp, all) %>%
  mutate(viral_score=factor(all))

LS0tCnRpdGxlOiAiVGVzdGluZyBTZXQgRmVhdHVyZXMgVmlzdWFsaXphdGlvbiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBub3RlYm9vayBleGFtaW5lcyB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gZGlmZmVyZW50IGZlYXR1cmVzIG9mIHRoZSBkYXRhIGZvciBkaXN0aW5ndWlzaGluZyB2aXJhbCBmcm9tIG5vbnZpcmFsIHNlcXVlbmNlcy4KClBsZWFzZSByZWFjaCBvdXQgdG8gSmFtZXMgUmlkZGVsbCAocmlkZGVsbC4yNkBidWNrZXllbWFpbC5vc3UuZWR1KSBvcgpCcmlkZ2V0IEhlZ2FydHkgKGJlaDUzQGNhc2UuZWR1KSByZWdhcmRpbmcgYW55IGlzc3Vlcywgb3Igb3BlbiBhbiBpc3N1ZSBvbiBnaXRodWIuCgpgYGB7ciBzZXR1cC1saWJyYXJ5fQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkocGx5cikKbGlicmFyeShyZXNoYXBlMikKbGlicmFyeSh2aXJpZGlzKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHJlYWRyKQpsaWJyYXJ5KGRhdGEudGFibGUpCmBgYAoKYGBge3J9CnZpcnVzZXMgPC0gcmVhZF90c3YoIi4uL0ludGVybWVkaWFyeUZpbGVzL3ZpcmFsX3Rvb2xzX2NvbWJpbmVkLnRzdiIpCmBgYAoKYGBge3J9CmNvbG5hbWVzKHZpcnVzZXMpCmBgYAoKYGBge3J9CmdncGxvdCh2aXJ1c2VzLCBhZXMoeD1STkEsIHk9dmlyYWwpKSArCiAgZ2VvbV9oZXgoYmlucyA9IDMwKSArCiAgc2NhbGVfZmlsbF9jb250aW51b3VzKHR5cGUgPSAidmlyaWRpcyIsIHRyYW5zPSJsb2cxMCIpICsKICB0aGVtZV9idygpICsKICBmYWNldF93cmFwKH5zZXF0eXBlLCBzY2FsZXMgPSAiZnJlZSIpICsKICB4bGFiKCJSTkEgVlMyIFNjb3JlIikgKwogIHlsYWIoIlZTMiBWaXJhbCBTY29yZSIpCgpnZ3Bsb3QodmlydXNlcywgYWVzKHg9Uk5BLCB5PXZpcmFsKSkgKwogIGdlb21faGV4KGJpbnMgPSAzMCkgKwogIHNjYWxlX2ZpbGxfY29udGludW91cyh0eXBlID0gInZpcmlkaXMiLCB0cmFucz0ibG9nMTAiKSArCiAgdGhlbWVfYncoKSArCiAgZmFjZXRfd3JhcCh+Y29uZnVzaW9uX21hdHJpeF9oaWdoX01DQywgc2NhbGVzID0gImZyZWUiKSArCiAgeGxhYigiUk5BIFZTMiBTY29yZSIpICsKICB5bGFiKCJWUzIgVmlyYWwgU2NvcmUiKQoKZ2dwbG90KHZpcnVzZXMsIGFlcyh4PU5DTERWLCB5PXZpcmFsKSkgKwogIGdlb21faGV4KGJpbnMgPSAzMCkgKwogIHNjYWxlX2ZpbGxfY29udGludW91cyh0eXBlID0gInZpcmlkaXMiLCB0cmFucz0ibG9nMTAiKSArCiAgdGhlbWVfYncoKSArCiAgZmFjZXRfd3JhcCh+c2VxdHlwZSwgc2NhbGVzID0gImZyZWUiKSArCiAgeGxhYigiTkNMRFYgVlMyIFNjb3JlIikgKwogIHlsYWIoIlZTMiBWaXJhbCBTY29yZSIpCgpnZ3Bsb3QodmlydXNlcywgYWVzKHg9TkNMRFYsIHk9dmlyYWwpKSArCiAgZ2VvbV9oZXgoYmlucyA9IDMwKSArCiAgc2NhbGVfZmlsbF9jb250aW51b3VzKHR5cGUgPSAidmlyaWRpcyIsIHRyYW5zPSJsb2cxMCIpICsKICB0aGVtZV9idygpICsKICBmYWNldF93cmFwKH5jb25mdXNpb25fbWF0cml4X2hpZ2hfTUNDLCBzY2FsZXMgPSAiZnJlZSIpICsKICB4bGFiKCJOQ0xEViBWUzIgU2NvcmUiKSArCiAgeWxhYigiVlMyIFZpcmFsIFNjb3JlIikKCmdncGxvdCh2aXJ1c2VzLCBhZXMoeD1sYXZpZGF2aXJpZGFlLCB5PXZpcmFsKSkgKwogIGdlb21faGV4KGJpbnMgPSAzMCkgKwogIHNjYWxlX2ZpbGxfY29udGludW91cyh0eXBlID0gInZpcmlkaXMiLCB0cmFucz0ibG9nMTAiKSArCiAgdGhlbWVfYncoKSArCiAgZmFjZXRfd3JhcCh+c2VxdHlwZSwgc2NhbGVzID0gImZyZWUiKSArCiAgeGxhYigiTGF2aWRhdmlyaWRhZSBWUzIgU2NvcmUiKSArCiAgeWxhYigiVlMyIFZpcmFsIFNjb3JlIikKCmdncGxvdCh2aXJ1c2VzLCBhZXMoeD1sYXZpZGF2aXJpZGFlLCB5PXZpcmFsKSkgKwogIGdlb21faGV4KGJpbnMgPSAzMCkgKwogIHNjYWxlX2ZpbGxfY29udGludW91cyh0eXBlID0gInZpcmlkaXMiLCB0cmFucz0ibG9nMTAiKSArCiAgdGhlbWVfYncoKSArCiAgZmFjZXRfd3JhcCh+Y29uZnVzaW9uX21hdHJpeF9oaWdoX01DQywgc2NhbGVzID0gImZyZWUiKSArCiAgeGxhYigiTGF2aWRhdmlyaWRhZSBWUzIgU2NvcmUiKSArCiAgeWxhYigiVlMyIFZpcmFsIFNjb3JlIikKYGBgCgojIEltcG9ydGFudCBmZWF0dXJlcyBieSBzZXF1ZW5jZSB0eXBlCgpgYGB7cn0KcGFsIDwtIGdndGhlbWVzOjp0YWJsZWF1X2NvbG9yX3BhbChwYWxldHRlPSJUYWJsZWF1IDEwIiwgdHlwZT0icmVndWxhciIpCmBgYAoKYGBge3J9CmdncGxvdCh2aXJ1c2VzLCBhZXMoeD1oYWxsbWFyaywgeT1jaGVja3ZfdmlyYWxfZ2VuZXMpKSArCiAgZ2VvbV9oZXgoYmlucyA9IDMwKSArCiAgc2NhbGVfZmlsbF9jb250aW51b3VzKHR5cGUgPSAidmlyaWRpcyIsIHRyYW5zPSJsb2cxMCIpICsKICB0aGVtZV9idygpICsKICBmYWNldF93cmFwKH5zZXF0eXBlLCBzY2FsZXMgPSAiZnJlZSIpICsKICB4bGFiKCJOdW1iZXIgb2YgSGFsbG1hcmsgR2VuZXMiKSArCiAgeWxhYigiTnVtYmVyIG9mIFZpcmFsIEdlbmVzIikKYGBgCgpgYGB7cn0KZ2dwbG90KHZpcnVzZXMsIGFlcyh4PWNoZWNrdl9ob3N0X2dlbmVzLCB5PWNoZWNrdl92aXJhbF9nZW5lcykpICsKICBnZW9tX2hleChiaW5zID0gMzApICsKICBzY2FsZV9maWxsX2NvbnRpbnVvdXModHlwZSA9ICJ2aXJpZGlzIiwgdHJhbnM9ImxvZzEwIikgKwogIHRoZW1lX2J3KCkgKwogIGZhY2V0X3dyYXAofnNlcXR5cGUsIHNjYWxlcyA9ICJmcmVlIikgKwogIHhsYWIoIk51bWJlciBvZiBIb3N0IEdlbmVzIikgKwogIHlsYWIoIk51bWJlciBvZiBWaXJhbCBHZW5lcyIpCmBgYAoKYGBge3J9CmdncGxvdCh2aXJ1c2VzLCBhZXMoeD1wZXJjZW50X3Vua25vd24sIHk9cGVyY2VudF92aXJhbCkpICsKICBnZW9tX2hleChiaW5zID0gMzApICsKICBzY2FsZV9maWxsX2NvbnRpbnVvdXModHlwZSA9ICJ2aXJpZGlzIiwgdHJhbnM9ImxvZzEwIikgKwogIHRoZW1lX2J3KCkgKwogIGZhY2V0X3dyYXAofnNlcXR5cGUsIHNjYWxlcyA9ICJmcmVlIikgKwogIHhsYWIoIlBlcmNlbnRhZ2Ugb2YgR2VuZXMgVW5rbm93biIpICsKICB5bGFiKCJQZXJjZW50YWdlIG9mIEdlbmVzIFZpcmFsIikKYGBgCgpgYGB7cn0KZ2dwbG90KHZpcnVzZXMsIGFlcyh4PXBlcmNlbnRfdW5rbm93biwgeT1jaGVja3ZfbGVuZ3RoKSkgKwogIGdlb21faGV4KGJpbnMgPSAzMCkgKwogIHNjYWxlX2ZpbGxfY29udGludW91cyh0eXBlID0gInZpcmlkaXMiLCB0cmFucz0ibG9nMTAiKSArCiAgdGhlbWVfYncoKSArCiAgZmFjZXRfd3JhcCh+c2VxdHlwZSwgc2NhbGVzID0gImZyZWUiKSArCiAgeGxhYigiUGVyY2VudGFnZSBvZiBHZW5lcyBVbmtub3duIikgKwogIHlsYWIoIkxlbmd0aCBvZiBTZXF1ZW5jZSIpCmBgYAoKYGBge3J9CmdncGxvdCh2aXJ1c2VzLCBhZXMoeD1wZXJjZW50X3ZpcmFsLCB5PWNoZWNrdl9sZW5ndGgpKSArCiAgZ2VvbV9oZXgoYmlucyA9IDMwKSArCiAgc2NhbGVfZmlsbF9jb250aW51b3VzKHR5cGUgPSAidmlyaWRpcyIsIHRyYW5zPSJsb2cxMCIpICsKICB0aGVtZV9idygpICsKICBmYWNldF93cmFwKH5zZXF0eXBlLCBzY2FsZXMgPSAiZnJlZSIpICsKICB4bGFiKCJQZXJjZW50YWdlIG9mIEdlbmVzIFZpcmFsIikgKwogIHlsYWIoIkxlbmd0aCBvZiBTZXF1ZW5jZSIpIApgYGAKCmBgYHtyfQpnZ3Bsb3QodmlydXNlcywgYWVzKHg9aGFsbG1hcmssIHk9Y2hlY2t2X2xlbmd0aCkpICsKICBnZW9tX2hleChiaW5zID0gMzApICsKICBzY2FsZV9maWxsX2NvbnRpbnVvdXModHlwZSA9ICJ2aXJpZGlzIiwgdHJhbnM9ImxvZzEwIikgKwogIHRoZW1lX2J3KCkgKwogIGZhY2V0X3dyYXAofnNlcXR5cGUsIHNjYWxlcyA9ICJmcmVlIikgKwogIHhsYWIoIk51bWJlciBvZiBIYWxsbWFyayBHZW5lcyIpICsKICB5bGFiKCJMZW5ndGggb2YgU2VxdWVuY2UiKSAKYGBgCgpgYGB7cn0KZ2dwbG90KHZpcnVzZXMsIGFlcyh4PWNoZWNrdl9ob3N0X2dlbmVzLCB5PWNoZWNrdl9sZW5ndGgpKSArCiAgZ2VvbV9oZXgoYmlucyA9IDMwKSArCiAgc2NhbGVfZmlsbF9jb250aW51b3VzKHR5cGUgPSAidmlyaWRpcyIsIHRyYW5zPSJsb2cxMCIpICsKICB0aGVtZV9idygpICsKICBmYWNldF93cmFwKH5zZXF0eXBlLCBzY2FsZXMgPSAiZnJlZSIpICsKICB4bGFiKCJMZW5ndGggb2YgU2VxdWVuY2UiKSArCiAgeWxhYigiTnVtYmVyIG9mIEhvc3QgR2VuZXMiKQpgYGAKYGBge3J9CnRhYmxlKHZpcnVzZXMkY2hlY2t2X2hvc3RfZ2VuZXM+PTUwLCB2aXJ1c2VzJHNlcXR5cGUpCmBgYAoKCgpgYGB7cn0KZ2dwbG90KHZpcnVzZXMsIGFlcyh4PWNoZWNrdl9sZW5ndGgsIHk9Y2hlY2t2X2NvbXBsZXRlbmVzcykpICsKICBnZW9tX2hleChiaW5zID0gMzApICsKICBzY2FsZV9maWxsX2NvbnRpbnVvdXModHlwZSA9ICJ2aXJpZGlzIiwgdHJhbnM9ImxvZzEwIikgKwogIHRoZW1lX2J3KCkgKwogIGZhY2V0X3dyYXAofnNlcXR5cGUsIHNjYWxlcyA9ICJmcmVlIikgKwogIHhsYWIoIkxlbmd0aCIpICsKICB5bGFiKCJDb21wbGV0ZW5lc3MiKSAKYGBgCgpgYGB7cn0KZ2dwbG90KHZpcnVzZXMsIGFlcyh4PWhhbGxtYXJrLCB5PWNoZWNrdl9jb21wbGV0ZW5lc3MpKSArCiAgZ2VvbV9oZXgoYmlucyA9IDMwKSArCiAgc2NhbGVfZmlsbF9jb250aW51b3VzKHR5cGUgPSAidmlyaWRpcyIsIHRyYW5zPSJsb2cxMCIpICsKICB0aGVtZV9idygpICsKICBmYWNldF93cmFwKH5zZXF0eXBlLCBzY2FsZXMgPSAiZnJlZSIpICsKICB4bGFiKCJIYWxsbWFyayBHZW5lcyIpICsKICB5bGFiKCJDb21wbGV0ZW5lc3MiKSAKYGBgCgpgYGB7cn0KdGFibGUodmlydXNlcyRzZXF0eXBlW3ZpcnVzZXMkY2hlY2t2X2xlbmd0aD41MDAwMCAmIHZpcnVzZXMkaGFsbG1hcms9PTBdKS90YWJsZSh2aXJ1c2VzJHNlcXR5cGUpCnRhYmxlKHZpcnVzZXMkc2VxdHlwZVsoKHZpcnVzZXMkY2hlY2t2X3ZpcmFsX2dlbmVzKjMpIDw9IHZpcnVzZXMkY2hlY2t2X2hvc3RfZ2VuZXMpICYgdmlydXNlcyRjaGVja3ZfcHJvdmlydXM9PSJObyJdKS90YWJsZSh2aXJ1c2VzJHNlcXR5cGUpCnRhYmxlKHZpcnVzZXMkc2VxdHlwZVt2aXJ1c2VzJGNoZWNrdl92aXJhbF9nZW5lcz09MCAmIHZpcnVzZXMkY2hlY2t2X2hvc3RfZ2VuZXM+PTFdKS90YWJsZSh2aXJ1c2VzJHNlcXR5cGUpCgp0YWJsZSh2aXJ1c2VzJHNlcXR5cGVbdmlydXNlcyRwZXJjZW50X3ZpcmFsPj01MF0pL3RhYmxlKHZpcnVzZXMkc2VxdHlwZSkKdGFibGUodmlydXNlcyRzZXF0eXBlW3ZpcnVzZXMkcGVyY2VudF91bmtub3duPj03NV0pL3RhYmxlKHZpcnVzZXMkc2VxdHlwZSkKdGFibGUodmlydXNlcyRzZXF0eXBlW3ZpcnVzZXMkcGVyY2VudF91bmtub3duPj03NSAmIHZpcnVzZXMkY2hlY2t2X2xlbmd0aDw1MDAwMF0pL3RhYmxlKHZpcnVzZXMkc2VxdHlwZSkKdGFibGUodmlydXNlcyRzZXF0eXBlW3ZpcnVzZXMkaGFsbG1hcms+Ml0pL3RhYmxlKHZpcnVzZXMkc2VxdHlwZVt2aXJ1c2VzJHNlcXR5cGUgJWluJSB1bmlxdWUodmlydXNlcyRzZXF0eXBlW3ZpcnVzZXMkaGFsbG1hcms+Ml0pXSkKYGBgCgpgYGB7cn0KdGFibGUodmlydXNlcyRzZXF0eXBlLCB2aXJ1c2VzJEthaWp1X1ZpcmFsKQpgYGAKCmBgYHtyfQpzZXFkYXRhIDwtIGRhdGEuZnJhbWUoc2VxdHlwZT12aXJ1c2VzJHNlcXR5cGVbIWR1cGxpY2F0ZWQodmlydXNlcyRjb250aWcpXSkKCnJvd25hbWVzKHNlcWRhdGEpIDwtIHZpcnVzZXMkY29udGlnWyFkdXBsaWNhdGVkKHZpcnVzZXMkY29udGlnKV0KYGBgCgpgYGB7cn0KbGlicmFyeShwaHlsb3NlcSkKYGBgCgoKYGBge3J9CmZlYXR1cmVzX3RhYmxlIDwtIHZpcnVzZXNbdmlydXNlcyRJbmRleD09MSxdCmZlYXR1cmVzX3RhYmxlIDwtIGZlYXR1cmVzX3RhYmxlWyxjb2xuYW1lcyhmZWF0dXJlc190YWJsZSkgJWluJSBjKCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJjaGVja3ZfdmlyYWxfZ2VuZXMiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgImNoZWNrdl9ob3N0X2dlbmVzIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJjaGVja3ZfdW5rbm93bl9nZW5lcyIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiY2hlY2t2X2xlbmd0aCIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiY2hlY2t2X2NvbXBsZXRlbmVzcyIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAiY2hlY2t2X3RvdGFsX2dlbmVzIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJwZXJjZW50X2hvc3QiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgInBlcmNlbnRfdmlyYWwiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgImhhbGxtYXJrIiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJwZXJjZW50X3Vua25vd24iCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApXQoKZmVhdHVyZXNfdGFibGVbaXMubmEoZmVhdHVyZXNfdGFibGUpXSA8LSAwCmZ0X2NvbG5hbWVzIDwtIGNvbG5hbWVzKGZlYXR1cmVzX3RhYmxlKQpmZWF0dXJlc190YWJsZSA8LSB0KGZlYXR1cmVzX3RhYmxlKQpyb3duYW1lcyhmZWF0dXJlc190YWJsZSkgPC0gZnRfY29sbmFtZXMKY29sbmFtZXMoZmVhdHVyZXNfdGFibGUpIDwtIHJvd25hbWVzKHNlcWRhdGEpCgpwaHlzZXFfcG9vbGVkIDwtIHBoeWxvc2VxKG90dV90YWJsZShmZWF0dXJlc190YWJsZSwgdGF4YV9hcmVfcm93cyA9IFQpKQpgYGAKCmBgYHtyfQpvcmRpbmF0aW9uIDwtIHBoeWxvc2VxOjpvcmRpbmF0ZShwaHlzZXEgPXBoeXNlcV9wb29sZWQsIG1ldGhvZCA9ICJQQ29BIiwgZGlzdGFuY2UgPSAiYnJheSIpCnBoeWxvc2VxOjpwbG90X29yZGluYXRpb24ocGh5c2VxID0gcGh5c2VxX3Bvb2xlZCwgb3JkaW5hdGlvbiA9IG9yZGluYXRpb24sCiAgICAgICAgICAgICAgICAgICAgICAgICAgc2hhcGU9Im51bXRvb2xzIiwgY29sb3I9Im51bV92aXJ1c2VzIikgKyAKICBnZW9tX3BvaW50KHNpemUgPSAzKSArCiAgdGhlbWVfYncoKSArCiAgZ2VvbV9sYWJlbChsYWJlbD1zZXFkYXRhJHRvb2xjb21ibykKCnBoeWxvc2VxOjpwbG90X29yZGluYXRpb24ocGh5c2VxID0gcGh5c2VxX3Bvb2xlZCwgb3JkaW5hdGlvbiA9IG9yZGluYXRpb24sCiAgICAgICAgICAgICAgICAgICAgICAgICAgc2hhcGU9Im51bXRvb2xzIiwgY29sb3I9Im51bV92aXJ1c2VzIikgKyAKICBnZW9tX3BvaW50KHNpemUgPSAzKSArCiAgdGhlbWVfYncoKQpgYGAKCiMgVmlyYWwgQWRkaXRpb24gVHVuaW5nIFJ1bGVzCgpgYGB7cn0KdmlydXNlc19zYW5rZXlfdHYgPC0gZGF0YS5mcmFtZShzZXF0eXBlPXZpcnVzZXMkc2VxdHlwZSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBral9jZWw9cmVwKDAsbnJvdyh2aXJ1c2VzKSksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaGFsbD1yZXAoMCxucm93KHZpcnVzZXMpKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBwdj1yZXAoMCxucm93KHZpcnVzZXMpKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjdmxfcHU9cmVwKDAsIG5yb3codmlydXNlcykpKQpgYGAKCmBgYHtyfQp2aXJ1c2VzX3NhbmtleV90diRral9jZWxbdmlydXNlcyRLYWlqdV9WaXJhbD09IlZpcnVzZXMiXSA8LSAwLjUKdmlydXNlc19zYW5rZXlfdHYkaGFsbFt2aXJ1c2VzJGhhbGxtYXJrPjJdIDwtIDAuNQp2aXJ1c2VzX3NhbmtleV90diRwdlt2aXJ1c2VzJHBlcmNlbnRfdmlyYWw+PTUwXSA8LSAwLjUKdmlydXNlc19zYW5rZXlfdHYkY3ZsX3B1W3ZpcnVzZXMkY2hlY2t2X2xlbmd0aD41MDAwMCAmIHZpcnVzZXMkcGVyY2VudF91bmtub3duPD03NV0gPC0gMC41ICAgIAoKdmlydXNlc19zYW5rZXlfdHYkYWxsIDwtIHJvd1N1bXModmlydXNlc19zYW5rZXlfdHZbLDI6NV0pCmBgYAoKYGBge3J9CnZpcnVzZXNfc2Fua2V5X3R2ICU+JQogIGNvdW50KHNlcXR5cGUsIGFsbCkgJT4lIHNwcmVhZChrZXkgPSBhbGwsIHZhbHVlPW4pCmBgYAoKYGBge3J9CnZpcnVzZXNfc2Fua2V5X3R2IDwtIHZpcnVzZXNfc2Fua2V5X3R2ICU+JQogIGNvdW50KHNlcXR5cGUsIGtqX2NlbCwgaGFsbCwgcHYsIGN2bF9wdSwgYWxsKSAlPiUKICBtdXRhdGUodmlyYWxfc2NvcmU9ZmFjdG9yKGFsbCkpCmBgYAoKYGBge3J9CmdncGxvdCh2aXJ1c2VzX3NhbmtleV90diwKICAgICAgIGFlcyhheGlzMSA9IGtqX2NlbCwgYXhpczIgPSBoYWxsLCBheGlzMyA9IHB2LCBheGlzNCA9IGN2bF9wdSwgeT1uKSkgKwogIGdlb21fYWxsdXZpdW0oYWVzKGZpbGw9dmlyYWxfc2NvcmUpLAogICAgICAgICAgICAgICAgd2lkdGggPSAwLCBrbm90LnBvcyA9IDAsIHJldmVyc2UgPSBGQUxTRSkgKwogIGdlb21fc3RyYXR1bSh3aWR0aCA9IDEvNSkgKwogIHRoZW1lX2J3KCkgKwogIGdlb21fdGV4dChzdGF0ID0gInN0cmF0dW0iLCBhZXMobGFiZWwgPSBhZnRlcl9zdGF0KHN0cmF0dW0pKSwKICAgICAgICAgICAgcmV2ZXJzZSA9IEZBTFNFKSArCiAgdGhlbWUoCiAgICAgICAgYXhpcy50ZXh0Lng9ZWxlbWVudF90ZXh0KHNpemU9MTQsIGFuZ2xlID0gOTApCiAgICAgICAgKSArCiAgc2NhbGVfeF9jb250aW51b3VzKGJyZWFrcz1jKDEsMiwzLDQpLAogICAgbGFiZWxzPWMoImthaWp1IiwgImhhbGxtYXJrIiwgIiUgdmlyYWwiLCAiJSB1bmtub3duIikpICsKICBmYWNldF93cmFwKH5zZXF0eXBlLCBzY2FsZXM9ImZyZWVfeSIpIApgYGAgCgojIFZpcmFsIFJlbW92YWwgVHVuaW5nIFJ1bGVzCgpgYGB7cn0KdmlydXNlc19zYW5rZXlfdG52IDwtIGRhdGEuZnJhbWUoc2VxdHlwZT12aXJ1c2VzJHNlcXR5cGUsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAga2pfY2VsPXJlcCgwLG5yb3codmlydXNlcykpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIGhnX3Bybz1yZXAoMCxucm93KHZpcnVzZXMpKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICB2Z19oZz1yZXAoMCxucm93KHZpcnVzZXMpKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICB2Z19oZ19wcm89cmVwKDAsIG5yb3codmlydXNlcykpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIGN2bF9obT1yZXAoMCwgbnJvdyh2aXJ1c2VzKSksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY3ZsX2NwPXJlcCgwLG5yb3codmlydXNlcykpKQpgYGAKCmBgYHtyfQp2aXJ1c2VzX3NhbmtleV90bnYka2pfY2VsW3ZpcnVzZXMkS2FpanVfVmlyYWw9PSJjZWxsdWxhciBvcmdhbmlzbXMiXSA8LSAtMC41CnZpcnVzZXNfc2Fua2V5X3RudiRoZ19wcm9bdmlydXNlcyRjaGVja3ZfaG9zdF9nZW5lcz41MCAmIHZpcnVzZXMkcHJvdmlydXM9PUZBTFNFXSA8LSAtMQp2aXJ1c2VzX3NhbmtleV90bnYkdmdfaGdbdmlydXNlcyRjaGVja3ZfdmlyYWxfZ2VuZXM9PTAgJiB2aXJ1c2VzJGNoZWNrdl9ob3N0X2dlbmVzPj0xXSA8LSAtMQp2aXJ1c2VzX3NhbmtleV90bnYkdmdfaGdfcHJvWygodmlydXNlcyRjaGVja3ZfdmlyYWxfZ2VuZXMqMykgPD0gdmlydXNlcyRjaGVja3ZfaG9zdF9nZW5lcykgJiB2aXJ1c2VzJHByb3ZpcnVzPT1GQUxTRV0gPC0gLTEKdmlydXNlc19zYW5rZXlfdG52JGN2bF9obVt2aXJ1c2VzJGNoZWNrdl9sZW5ndGg+NTAwMDAwICYgdmlydXNlcyRoYWxsbWFyazw9MV0gPC0gLTEKdmlydXNlc19zYW5rZXlfdG52JGN2bF9jcFt2aXJ1c2VzJGNoZWNrdl9sZW5ndGg+NTAwMCAmIHZpcnVzZXMkY2hlY2t2X2NvbXBsZXRlbmVzczw9NzVdIDwtIC0wLjUgICAgICAgICAgICAgCnZpcnVzZXNfc2Fua2V5X3RudiRhbGwgPC0gcm93U3Vtcyh2aXJ1c2VzX3NhbmtleV90bnZbLDI6N10pCmBgYAoKYGBge3J9CnZpcnVzZXNfc2Fua2V5X3RudiAlPiUKICBjb3VudChzZXF0eXBlLCBhbGwpICU+JSBzcHJlYWQoa2V5ID0gYWxsLCB2YWx1ZT1uKQpgYGAKCgpgYGB7cn0KdmlydXNlc19zYW5rZXlfdG52IDwtIHZpcnVzZXNfc2Fua2V5X3RudiAlPiUKICBjb3VudChzZXF0eXBlLCBral9jZWwsIGhnX3BybywgdmdfaGcsIHZnX2hnX3BybywgY3ZsX2htLCBjdmxfY3AsIGFsbCkgJT4lCiAgbXV0YXRlKHZpcmFsX3Njb3JlPWZhY3RvcihhbGwpKQpgYGAKCmBgYHtyfQpnZ3Bsb3QodmlydXNlc19zYW5rZXlfdG52LAogICAgICAgYWVzKGF4aXMxID0ga2pfY2VsLCBheGlzMiA9IGhnX3BybywgYXhpczMgPSB2Z19oZywgYXhpczQgPSBjdmxfaG0sIGF4aXM1PWN2bF9obSwgYXhpczY9Y3ZsX2NwLCAKICAgICAgICAgICB5PW4pKSArCiAgZ2VvbV9hbGx1dml1bShhZXMoZmlsbD12aXJhbF9zY29yZSksCiAgICAgICAgICAgICAgICB3aWR0aCA9IDAsIGtub3QucG9zID0gMCwgcmV2ZXJzZSA9IEZBTFNFKSArCiAgZ2VvbV9zdHJhdHVtKHdpZHRoID0gMS81KSArCiAgdGhlbWVfYncoKSArCiAgZ2VvbV90ZXh0KHN0YXQgPSAic3RyYXR1bSIsIGFlcyhsYWJlbCA9IGFmdGVyX3N0YXQoc3RyYXR1bSkpLAogICAgICAgICAgICByZXZlcnNlID0gRkFMU0UpICsKICB0aGVtZSgKICAgICAgICBheGlzLnRleHQueD1lbGVtZW50X3RleHQoc2l6ZT0xNCwgYW5nbGUgPSA5MCkKICAgICAgICApICsKICBzY2FsZV94X2NvbnRpbnVvdXMoYnJlYWtzPWMoMSwyLDMsNCw1LDYpLAogICAgbGFiZWxzPWMoImthaWp1IiwgImhvc3QgZ2VuZXMiLCAidmlyYWwgYW5kIGhvc3QiLCAicHJvdmlydXMiLAogICAgICAgICAgICAgImhhbGxtYXJrIiwgImNvbXBsZXRlIikpICsKICBmYWNldF93cmFwKH5zZXF0eXBlLCBzY2FsZXM9ImZyZWVfeSIpIApgYGAgCgoKCgoKCgoKCgoKCgo=